Setup

Load R libraries

library(data.table)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(limma)
library(biomaRt)
library(fgsea)
library(goseq)

theme_set(theme_classic())

cell_type_name = params$cell_type_name
graph_weight = params$graph_weight

cell_type_name
## [1] "cd8"
graph_weight
## [1] "1.0"

Check enrichment of gene sets

Read in gene info and gene set assignments

file_tag = sprintf("%s_BL_%s", cell_type_name, graph_weight)

assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag), 
                     what = character(), sep="\n")

gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag), 
                 what = character(), sep="\n")

gene_sets = sapply(gene_sets, strsplit, USE.NAMES=FALSE, split=",")
n_genes   = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    5.00   14.00   15.00   14.35   15.00   17.00
length(n_genes)
## [1] 40
sort(n_genes)
##  [1]  5 11 12 13 13 13 14 14 14 14 14 14 14 14 14 14 14 14 15 15 15 15 15 15 15
## [26] 15 15 15 15 15 15 15 15 15 16 16 16 17 17 17

Find gene symbols

Find gene symbols from bioMart.

All the gene symbols that can be found in bioMart are consistent with what we have. So no need to run it.

ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")

gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"), 
                filters = "external_gene_name", 
                values = assayed_genes, 
                mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]

table(assayed_genes %in% gene_BM$external_gene_name)

t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]

table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]

Find gene symbols using the alias2Symbol function from limma.

a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
  gi = assayed_genes[i]
  ai = alias2Symbol(gi)
  if(length(ai) > 1){
    print(gi)
    print(ai)
  }
  a2s[i] = ai[1]
}
## [1] "QARS"
## [1] "EPRS1" "QARS1"
## [1] "SEPT2"
## [1] "SEPTIN6" "SEPTIN2"
table(is.na(a2s))
## 
## FALSE  TRUE 
##  1607    42
table(a2s == assayed_genes, useNA = 'ifany')
## 
## FALSE  TRUE  <NA> 
##    42  1565    42
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)

gene_info[sym_in_data != sym_limma,]
##     sym_in_data sym_limma
##  1:    C10orf91 LINC02870
##  2:    C12orf10      MYG1
##  3:    C12orf45  NOPCHAP1
##  4:     C6orf48    SNHG32
##  5:     C6orf99 LINC02901
##  6:    CXorf40A     EOLA1
##  7:     CXorf57      RADX
##  8:     FAM102A     EEIG1
##  9:     FAM173A    ANTKMT
## 10:     FAM213B    PRXL2B
## 11:       H2AFX      H2AX
## 12:   HIST1H2AG    H2AC11
## 13:   HIST1H2BK    H2BC12
## 14:   HIST1H2BN    H2BC15
## 15:    HIST1H3A      H3C1
## 16:    HIST1H3H     H3C10
## 17:    HIST1H4C      H4C3
## 18:   HIST2H2BF    H2BC18
## 19:    KIAA0391     PRORP
## 20:        QARS     EPRS1
## 21:       SEPT6   SEPTIN6
## 22:       ARNTL     BMAL1
## 23:    C12orf65     MTRFR
## 24:    C16orf72   HAPSTR1
## 25:      CCDC84   CENATAC
## 26:      DOPEY2     DOP1B
## 27:     FAM126B     HYCC2
## 28:    FAM160B1    FHIP2A
## 29:        H1FX     H1-10
## 30:       H2AFJ      H2AJ
## 31:       HEXDC      HEXD
## 32:    HIST1H1C      H1-2
## 33:    HIST1H1D      H1-3
## 34:    HIST1H1E      H1-4
## 35:    KIAA1109     BLTP1
## 36:    KIAA1551     RESF1
## 37:        MKL1     MRTFA
## 38:       NARFL     CIAO3
## 39:       SEPT2   SEPTIN6
## 40:      TARSL2     TARS3
## 41:      TMEM8A     PGAP6
## 42:       WDR60   DYNC2I1
##     sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma), gene_symbol := sym_limma]

dim(gene_info)
## [1] 1649    3
gene_info[1:5,]
##    sym_in_data sym_limma gene_symbol
## 1:      ABLIM1    ABLIM1      ABLIM1
## 2:  AC004687.1      <NA>  AC004687.1
## 3:  AC004854.2      <NA>  AC004854.2
## 4:  AC007384.1      <NA>  AC007384.1
## 5:  AC007952.4      <NA>  AC007952.4
t1 = table(gene_info$gene_symbol)
table(t1)
## t1
##    1    2 
## 1647    1
gene_info[gene_symbol %in% names(t1)[t1 == 2],]
##    sym_in_data sym_limma gene_symbol
## 1:       SEPT6   SEPTIN6     SEPTIN6
## 2:       SEPT2   SEPTIN6     SEPTIN6
gene_info[sym_in_data == "HIST1H2BC", gene_symbol:="H2BC4"]
gene_info[sym_in_data == "SEPT6", gene_symbol:="SEPTIN6"]
gene_info[sym_in_data == "SEPT2", gene_symbol:="SEPTIN2"]

Prepare gene set information

Gene set annotations (by gene symbols) were downloaded from MSigDB website.

gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]]    = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
gmtfile[["immune"]]   = "../Annotation/c7.all.v2023.2.Hs.symbols.gmt"

pathways = list()
for(k1 in names(gmtfile)){
  pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}

names(pathways)
## [1] "reactome" "go_bp"    "immune"
sapply(pathways, length)
## reactome    go_bp   immune 
##     1692     7647     5219

Filter gene sets for size between 10 and 500.

lapply(pathways, function(v){
  quantile(sapply(v, length), probs = seq(0, 1, 0.1), na.rm = TRUE)
})
## $reactome
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    7.0    9.0   12.0   17.0   23.0   31.0   44.0   71.8  120.9 1463.0 
## 
## $go_bp
##     0%    10%    20%    30%    40%    50%    60%    70%    80%    90%   100% 
##    5.0    6.0    8.0   10.0   14.0   19.0   29.0   46.0   80.8  183.0 1966.0 
## 
## $immune
##   0%  10%  20%  30%  40%  50%  60%  70%  80%  90% 100% 
##    5  162  193  197  199  199  200  200  200  200 1992
for(k1 in names(pathways)){
  p1 = pathways[[k1]]
  pathways[[k1]] = p1[sapply(p1, length) %in% 10:500]
}

Conduct enrichment analysis

dim(gene_info)
## [1] 1649    3
max_n2kp = 10

goseq_res = NULL

for(k in 1:length(gene_sets)){
  if(length(gene_sets[[k]]) < 10) { next }
  
  print(k)
  set_k = paste0("set_", k)
  print(gene_sets[[k]])
  
  genes = gene_info$sym_in_data %in% gene_sets[[k]]
  names(genes) = gene_info$gene_symbol
  table(genes)
  
  pwf = nullp(genes, "hg38", "geneSymbol")

  for(k1 in names(pathways)){
    p1 = pathways[[k1]]
    res1 = goseq(pwf, "hg38", "geneSymbol", 
                 gene2cat=goseq:::reversemapping(p1))
    res1$FDR  = p.adjust(res1$over_represented_pvalue, method="BH")
    
    nD = sum(res1$FDR < 0.1)
    
    if(nD > 0){
      res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
      res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
      res1$category = gsub("_", " ", res1$category)
      res1$category = tolower(res1$category)
      res1$category = substr(res1$category, start=1, stop=81)
      goseq_res[[set_k]][[k1]] = res1
    }
  }
}
## [1] 1
##  [1] "AC087623.3" "AMD1"       "CITED2"     "COG5"       "GLTP"      
##  [6] "IFRD1"      "ITGAE"      "MAPRE2"     "RSRP1"      "THAP9-AS1" 
## [11] "WSB1"       "ZFP36L1"    "NEAT1"      "RASGRP1"

## [1] 2
##  [1] "CCL4L2"  "ARHGEF3" "ARL4C"   "CLEC16A" "COL6A2"  "COL6A3"  "DDIT4"  
##  [8] "ISG20"   "ITM2A"   "LAIR2"   "LPIN1"   "REXO1"   "TARSL2"  "TRAV4"  
## [15] "XCL1"

## [1] 3
##  [1] "ATP5F1A"      "C10orf91"     "COQ8A"        "EPB41L4A-AS1" "SNHG12"      
##  [6] "BROX"         "DMTF1"        "MTERF2"       "PCED1B"       "PPP4R3B"     
## [11] "PTGDS"        "RNF19A"       "SLF2"         "TMEM127"      "TRBV7-6"     
## [16] "TSPAN32"      "VPS13C"

## [1] 4
##  [1] "AC245297.3" "AF213884.3" "CXorf40A"   "KCNQ1OT1"   "KMT2E-AS1" 
##  [6] "MATR3-1"    "NPIPB4"     "TRAV8-4"    "TRBV28"     "TRBV7-9"   
## [11] "TRGV7"      "GK5"        "MIAT"       "PARP15"     "SPATA13"

## [1] 5
##  [1] "AC044849.1"  "MZF1-AS1"    "TRBV6-1"     "ABCC10"      "AP005482.1" 
##  [6] "CPPED1"      "DENND4B"     "ELMOD3"      "LINC02256"   "MINDY2"     
## [11] "NBEAL2"      "SEC14L1"     "THUMPD3-AS1" "TRBV2"       "TRGV10"     
## [16] "Z93930.2"    "ZNF808"

## [1] 6
##  [1] "C12orf45" "CD28"     "GSTM1"    "NR1D1"    "PDE7A"    "TSPYL4"  
##  [7] "ANKRD12"  "ANKRD36C" "NRDC"     "PCSK7"    "PPM1K"    "SLFN12L" 
## [13] "TRIM38"   "UNC13D"   "VTI1A"

## [1] 7
##  [1] "CAMK4"  "KCTD7"  "KLRK1"  "MAP3K2" "MZT2B"  "NUAK2"  "WARS2"  "CHD9"  
##  [9] "ERICH1" "IRF9"   "MYO1F"  "NLRC3"  "RASA3"  "STK10"

## [1] 8
##  [1] "ALKBH7"  "C6orf48" "EPS8"    "RCAN3"   "TXK"     "GNPTAB"  "KLF2"   
##  [8] "MAN2C1"  "PDE4B"   "RIC3"    "RNF125"  "SCRN3"   "TOB1"    "TRGV4"  
## [15] "ZBP1"

## [1] 9
##  [1] "SNHG9"      "ADCY7"      "CCDC84"     "ERBIN"      "FAM78A"    
##  [6] "HPS4"       "KIAA1109"   "NUTM2B-AS1" "RAB27B"     "SIDT1"     
## [11] "SLCO3A1"    "SYTL3"      "TRDV1"      "TTC38"      "ZNF652"

## [1] 10
##  [1] "ABCA5"      "AC092683.1" "DDX3Y"      "EIF1AY"     "HECTD4"    
##  [6] "KDM5D"      "RPS4Y1"     "SBNO2"      "TRAV17"     "TTTY15"    
## [11] "UTY"

## [1] 11
##  [1] "BEX4"     "CCR7"     "CD40LG"   "CREBL2"   "CRTAM"    "FAM102A" 
##  [7] "FXYD7"    "HIST1H3H" "SDR42E2"  "WDR86"    "EIF4E3"   "PIK3CD"  
## [13] "PYROXD1"  "RUBCN"    "SAMD9L"   "TTC16"

## [1] 12
##  [1] "LEF1"    "SELL"    "ZFAND1"  "ACAP3"   "ATAD2"   "CARD11"  "CHD1"   
##  [8] "FAM53B"  "MSI2"    "PIP4K2B" "PKD1"    "RNF157"  "VCAN"

## [1] 13
##  [1] "CHRM3-AS2" "RETREG1"   "BMT2"      "FAM133B"   "HRH2"      "IGKV3-15" 
##  [7] "LINC02384" "PUM3"      "S100A12"   "TENT5C"    "THAP5"     "TRAV12-3" 
## [13] "TRAV19"    "TRBV11-2"  "TRBV4-2"

## [1] 14
##  [1] "AL118516.1" "APMAP"      "CMC1"       "EFCAB2"     "FAM173A"   
##  [6] "KIR3DL2"    "MATK"       "PITPNC1"    "PRR7"       "IQGAP2"    
## [11] "RLF"        "TBC1D2B"    "TRGC2"      "TUT7"       "XIST"

## [1] 15
##  [1] "LINC00402" "SESN2"     "YPEL3"     "CX3CR1"    "GCA"       "LSS"      
##  [7] "MKL1"      "NLRC5"     "ODF3B"     "PATL2"     "PDZD4"     "PEX26"    
## [13] "SZT2"      "ZMIZ2"     "ZNF557"

## [1] 16
##  [1] "GALNT11"  "MYLIP"    "ANKRD36"  "ARHGAP10" "ASCL2"    "CD46"    
##  [7] "GON4L"    "MBD5"     "PDE4D"    "PHF14"    "PLAC8"    "RAP1GAP2"
## [13] "SENP7"    "TTC17"

## [1] 17
##  [1] "ASL"        "FAM213B"    "AC020659.1" "C2CD3"      "CCDC112"   
##  [6] "DDX60L"     "EPSTI1"     "ETFDH"      "IFI44L"     "IRAK4"     
## [11] "MX2"        "RREB1"      "TRDC"       "XAF1"

## [1] 18
##  [1] "LRRN3"     "PAPSS1"    "PDCD4-AS1" "TBCCD1"    "TESPA1"    "TMEM204"  
##  [7] "TRAV21"    "DOCK10"    "ERAP2"     "FGFBP2"    "GALNT3"    "IFI27"    
## [13] "MIGA1"     "OAS2"      "ZNF683"

## [1] 19
##  [1] "CLDND1"  "CPNE1"   "GZMK"    "HIKESHI" "KIF9"    "PIK3IP1" "SLC38A2"
##  [8] "UIMC1"   "APOL6"   "TUT4"    "VPS13B"  "ZBTB40"  "ZDHHC5"

## [1] 20
##  [1] "NXT2"    "TCEA3"   "ZNF575"  "ABR"     "ARAP1"   "BCL9L"   "C5orf24"
##  [8] "FAM13B"  "GPR141"  "GRK2"    "INPP4A"  "INPP5D"  "SSBP3"   "TRAV27" 
## [15] "ZNF292"  "ZNF493"

## [1] 21
##  [1] "AC245014.3" "AL138963.3" "DTHD1"      "LST1"       "NBPF14"    
##  [6] "RGL4"       "TC2N"       "TRAV13-1"   "TRAV8-2"    "TRBV9"     
## [11] "C16orf72"   "GABPB2"     "HECA"       "PCNX1"      "ZNF236"

## [1] 22
##  [1] "AC008555.5" "ARRDC2"     "LRRC23"     "NOCT"       "NUP58"     
##  [6] "PGGHG"      "TRAV14DV4"  "TRGV8"      "ZNF749"     "AC116407.2"
## [11] "BICRAL"     "GPR132"     "MYBL1"      "POLR2J3-1"  "TRANK1"

## [1] 23
##  [1] "AIF1"    "CD27"    "GIMAP1"  "MSC"     "PCMTD2"  "PDE3B"   "PLK2"   
##  [8] "SLC38A1" "TMEM107" "CASP10"  "COX19"   "DUS1L"   "ITGAM"   "YPEL1"

## [1] 24
##  [1] "AC119396.1" "ANXA2R"     "BNIP3L"     "C12orf10"   "CMTM7"     
##  [6] "DYRK4"      "EPHX2"      "HIBADH"     "NT5C3B"     "PPP1R15B"  
## [11] "RGCC"       "SNHG15"     "SNHG8"      "ZNF10"

## [1] 25
##  [1] "BTG1"     "HIST1H4C" "LTB"      "RGS10"    "TRBC1"    "CD38"    
##  [7] "GBP1"     "GBP5"     "IFITM2"   "LAG3"     "MIDN"     "NKG7"    
## [13] "SLA2"     "STK17B"

## [1] 26
##  [1] "AC004687.1" "CD84"       "CXXC5"      "IER5"       "NCR1"      
##  [6] "RAB33B"     "RPS26"      "TCP11L2"    "TOX"        "TRAV5"     
## [11] "ZFAS1"      "CYTOR"      "MCTP2"      "RUFY2"

## [1] 27
##  [1] "IL6R"    "NOSIP"   "ARHGEF9" "CISH"    "CLUH"    "GPHN"    "IL6ST"  
##  [8] "MYO9A"   "NAA25"   "OSM"     "PARP9"   "RALGAPB" "SYNRG"   "ZNF318" 
## [15] "ZNF407"

## [1] 28
##  [1] "BTG2"     "CCNB1IP1" "GCSAM"    "LDLRAP1"  "SERINC5"  "TCF7"    
##  [7] "WDR54"    "ABCA7"    "DIAPH2"   "EHMT1"    "HIPK3"    "KIAA2026"
## [13] "NEK9"     "USP16"

## [1] 29
##  [1] "COA1"      "SLC2A3"    "TMIGD2"    "AP3B1"     "AP3M2"     "CEMIP2"   
##  [7] "EHBP1L1"   "FGL2"      "HEXDC"     "INO80D"    "LINC02446" "MAF"      
## [13] "OXNAD1"    "TMEM131L"

## [1] 30
##  [1] "AC004854.2" "AC087239.1" "AL136454.1" "AL627171.1" "BX284668.6"
##  [6] "LINC02273"  "MCUB"       "MFNG"       "RGS1"       "Z93241.1"  
## [11] "CRYBG1"     "GCN1"       "GDPD5"      "MARF1"      "MICAL2"    
## [16] "PSMA3-AS1"  "ZNF83"

## [1] 31
##  [1] "AC012645.3" "AC016405.3" "AC020911.2" "AC027644.3" "AC083798.2"
##  [6] "AC091271.1" "AK5"        "AL135791.1" "ARF4-AS1"   "LINC01465" 
## [11] "PRAG1"      "TRAV12-2"   "TRAV8-3"    "TRBV3-1"    "TRBV6-2"   
## [16] "ZNF862"

## [1] 32
##  [1] "CARHSP1"  "CRLF3"    "CYB561A3" "NT5DC1"   "ARAP2"    "ARHGAP30"
##  [7] "DGKD"     "HIPK1"    "HSH2D"    "NCKAP1L"  "NECAP1"   "PRR14L"  
## [13] "R3HCC1L"  "SETX"     "TMEM8A"

## [1] 33
##  [1] "AC015982.1" "AC025164.1" "AC083880.1" "AL121944.1" "AL451085.1"
##  [6] "ARMH1"      "ATP2B1-AS1" "CSKMT"      "CXorf57"    "HIPK1-AS1" 
## [11] "ILF3-DT"    "INPP4B"     "OSER1-DT"   "TRABD2A"

## [1] 34
##  [1] "SESN1"       "TRBV20-1"    "TRBV7-2"     "ADGRE5"      "AKNA"       
##  [6] "ARHGAP45"    "ENOSF1"      "IQCG"        "NFATC3"      "OGA"        
## [11] "PIK3R5"      "SLC16A1-AS1" "SUSD6"       "TRAV9-2"     "XCL2"

## [1] 35
##  [1] "AOAH"        "FAM118A"     "HLA-DMB"     "KLRC3"       "KLRF1"      
##  [6] "MTRNR2L8"    "TRAV38-2DV8" "TRBV6-5"     "ADGRG1"      "FAM126B"    
## [11] "FCRL6"       "LILRB1"      "TMEM181"

## [1] 36
##  [1] "AC007952.4" "AC103591.3" "AL357060.1" "BBS9"       "C6orf99"   
##  [6] "HELQ"       "IGKV3-20"   "IGLV1-44"   "INTS6L"     "JAML"      
## [11] "LINC00649"  "TNFRSF25"   "TRAV1-2"    "TRAV3"      "TRGV5"

## [1] 37
##  [1] "AC007384.1" "AC025171.3" "AL139246.5" "ARL4A"      "GADD45B"   
##  [6] "ID1"        "NR4A2"      "NR4A3"      "AC016831.7" "ELMO1"     
## [11] "FAM160B1"   "GPRIN3"     "NARFL"      "PRR5L"      "ZBTB20"

## [1] 39
##  [1] "ARHGAP9" "EOMES"   "FCRL3"   "TRG-AS1" "TRGV9"   "ATAD2B"  "CCL4"   
##  [8] "CREBZF"  "CST7"    "CTSW"    "GPR174"  "ITGAL"   "MYO1G"   "NFE2L1"

## [1] 40
##  [1] "ADAM17"  "ADHFE1"  "CD226"   "CHD6"    "ETNK1"   "LONP2"   "MPPE1"  
##  [8] "SEMA4D"  "ST8SIA4" "SUSD1"   "TGFBR3"  "WDTC1"

for(n1 in names(goseq_res)){
  k = as.numeric(gsub("set_", "", n1))
  print(n1)
  print(gene_sets[[k]])
  print(goseq_res[[n1]])

}
## [1] "set_2"
##  [1] "CCL4L2"  "ARHGEF3" "ARL4C"   "CLEC16A" "COL6A2"  "COL6A3"  "DDIT4"  
##  [8] "ISG20"   "ITM2A"   "LAIR2"   "LPIN1"   "REXO1"   "TARSL2"  "TRAV4"  
## [15] "XCL1"   
## $reactome
##                                                         category
## 159                  collagen biosynthesis and modifying enzymes
## 160                                 collagen chain trimerization
## 590                                           ncam1 interactions
## 72  assembly of collagen fibrils and other multimeric structures
## 162                                           collagen formation
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 159            8.097194e-05                1.0000000          2        2
## 160            8.097194e-05                1.0000000          2        2
## 590            2.416333e-04                0.9999994          2        3
## 72             2.416852e-04                0.9999994          2        3
## 162            2.416852e-04                0.9999994          2        3
##            FDR
## 159 0.04708518
## 160 0.04708518
## 590 0.05621598
## 72  0.05621598
## 162 0.05621598
## 
## [1] "set_8"
##  [1] "ALKBH7"  "C6orf48" "EPS8"    "RCAN3"   "TXK"     "GNPTAB"  "KLF2"   
##  [8] "MAN2C1"  "PDE4B"   "RIC3"    "RNF125"  "SCRN3"   "TOB1"    "TRGV4"  
## [15] "ZBP1"   
## $immune
##                                         category over_represented_pvalue
## 1245 gse17974 0h vs 2h in vitro act cd4 tcell up             1.28182e-05
##      under_represented_pvalue numDEInCat numInCat        FDR
## 1245                0.9999996          5       41 0.06534718
## 
## [1] "set_10"
##  [1] "ABCA5"      "AC092683.1" "DDX3Y"      "EIF1AY"     "HECTD4"    
##  [6] "KDM5D"      "RPS4Y1"     "SBNO2"      "TRAV17"     "TTTY15"    
## [11] "UTY"       
## $immune
##                                                  category
## 4352 gse5099 classical m1 vs alternative m2 macrophage dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4352            3.246038e-06                        1          4       20
##            FDR
## 4352 0.0165483
## 
## [1] "set_17"
##  [1] "ASL"        "FAM213B"    "AC020659.1" "C2CD3"      "CCDC112"   
##  [6] "DDX60L"     "EPSTI1"     "ETFDH"      "IFI44L"     "IRAK4"     
## [11] "MX2"        "RREB1"      "TRDC"       "XAF1"      
## $immune
##                                                                      category
## 12   erwin cohen blood vaccine tc 83 age 23 48yo vaccinated vs control 7dy up
## 383                               gse13485 day1 vs day7 yf17d vaccine pbmc dn
## 472                            gse14000 unstim vs 4h lps dc translated rna dn
## 381                               gse13485 day1 vs day3 yf17d vaccine pbmc dn
## 10           erwin cohen blood tc 83 age 23 48yo vaccinated vs control 2dy up
## 1344                             gse18791 unstim vs newcatsle virus dc 18h dn
## 375                               gse13485 ctrl vs day3 yf17d vaccine pbmc dn
## 471                                           gse14000 unstim vs 4h lps dc dn
## 391                            gse13485 pre vs post yf17d vaccination pbmc dn
## 387                               gse13485 day3 vs day7 yf17d vaccine pbmc dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 12              4.887953e-06                0.9999999          5       36
## 383             1.175114e-05                0.9999996          5       41
## 472             1.239645e-05                0.9999996          5       44
## 381             1.357466e-05                0.9999997          4       21
## 10              1.641272e-05                0.9999995          5       45
## 1344            1.707362e-05                0.9999994          5       45
## 375             1.805806e-05                0.9999994          5       46
## 471             2.699099e-05                0.9999990          5       51
## 391             2.753164e-05                0.9999990          5       49
## 387             3.496352e-05                0.9999986          5       53
##             FDR
## 12   0.01315143
## 383  0.01315143
## 472  0.01315143
## 381  0.01315143
## 10   0.01315143
## 1344 0.01315143
## 375  0.01315143
## 471  0.01388303
## 391  0.01388303
## 387  0.01388303
## 
## [1] "set_27"
##  [1] "IL6R"    "NOSIP"   "ARHGEF9" "CISH"    "CLUH"    "GPHN"    "IL6ST"  
##  [8] "MYO9A"   "NAA25"   "OSM"     "PARP9"   "RALGAPB" "SYNRG"   "ZNF318" 
## [15] "ZNF407" 
## $go_bp
##                                                             category
## 2937 positive regulation of tyrosine phosphorylation of stat protein
## 4595                        tyrosine phosphorylation of stat protein
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 2937            1.570789e-06                1.0000000          4        9
## 4595            4.838054e-06                0.9999999          4       11
##              FDR
## 2937 0.007368573
## 4595 0.011347655
## 
## [1] "set_35"
##  [1] "AOAH"        "FAM118A"     "HLA-DMB"     "KLRC3"       "KLRF1"      
##  [6] "MTRNR2L8"    "TRAV38-2DV8" "TRBV6-5"     "ADGRG1"      "FAM126B"    
## [11] "FCRL6"       "LILRB1"      "TMEM181"    
## $immune
##                                                     category
## 4250 gse45739 unstim vs acd3 acd28 stim nras ko cd4 tcell dn
##      over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 4250            9.944221e-06                0.9999997          5       54
##             FDR
## 4250 0.05069564
## 
## [1] "set_36"
##  [1] "AC007952.4" "AC103591.3" "AL357060.1" "BBS9"       "C6orf99"   
##  [6] "HELQ"       "IGKV3-20"   "IGLV1-44"   "INTS6L"     "JAML"      
## [11] "LINC00649"  "TNFRSF25"   "TRAV1-2"    "TRAV3"      "TRGV5"     
## $reactome
##                                                                     category
## 180                                         creation of c4 and c2 activators
## 444                                         initial triggering of complement
## 900                                           scavenging of heme from plasma
## 132                           cell surface interactions at the vascular wall
## 121                                             cd22 mediated bcr regulation
## 435 immunoregulatory interactions between a lymphoid and a non lymphoid cell
## 310                                                          fcgr activation
## 163                                                       complement cascade
## 872                                    role of phospholipids in phagocytosis
## 96                      binding and uptake of ligands by scavenger receptors
##     over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 180            0.0001102635                0.9999998          2        4
## 444            0.0001102635                0.9999998          2        4
## 900            0.0001102635                0.9999998          2        4
## 132            0.0001161525                0.9999988          3       25
## 121            0.0001809758                0.9999995          2        5
## 435            0.0003021830                0.9999954          3       34
## 310            0.0003794161                0.9999982          2        7
## 163            0.0003797855                0.9999982          2        7
## 872            0.0003823279                0.9999982          2        7
## 96             0.0003835763                0.9999981          2        7
##            FDR
## 180 0.03377133
## 444 0.03377133
## 900 0.03377133
## 132 0.03377133
## 121 0.04209496
## 435 0.04460992
## 310 0.04460992
## 163 0.04460992
## 872 0.04460992
## 96  0.04460992
saveRDS(goseq_res, sprintf("output/gene_set_enrichments_%s.RDS", 
                           file_tag))

Session information

gc()
##            used  (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
## Ncells  8958253 478.5   16391124 875.4         NA 16391124 875.4
## Vcells 19167796 146.3   59973464 457.6      65536 77218972 589.2
sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] TxDb.Hsapiens.UCSC.hg38.knownGene_3.16.0
##  [2] GenomicFeatures_1.50.4                  
##  [3] GenomicRanges_1.50.2                    
##  [4] GenomeInfoDb_1.34.9                     
##  [5] org.Hs.eg.db_3.16.0                     
##  [6] AnnotationDbi_1.60.2                    
##  [7] IRanges_2.32.0                          
##  [8] S4Vectors_0.36.2                        
##  [9] Biobase_2.58.0                          
## [10] BiocGenerics_0.44.0                     
## [11] goseq_1.50.0                            
## [12] geneLenDataBase_1.34.0                  
## [13] BiasedUrn_2.0.10                        
## [14] fgsea_1.24.0                            
## [15] biomaRt_2.54.1                          
## [16] limma_3.54.2                            
## [17] tidyr_1.3.0                             
## [18] ggpubr_0.6.0                            
## [19] ggplot2_3.4.2                           
## [20] data.table_1.14.8                       
## 
## loaded via a namespace (and not attached):
##  [1] nlme_3.1-162                matrixStats_1.0.0          
##  [3] bitops_1.0-7                bit64_4.0.5                
##  [5] filelock_1.0.2              progress_1.2.2             
##  [7] httr_1.4.6                  tools_4.2.3                
##  [9] backports_1.4.1             bslib_0.4.2                
## [11] utf8_1.2.3                  R6_2.5.1                   
## [13] mgcv_1.8-42                 DBI_1.1.3                  
## [15] colorspace_2.1-0            withr_2.5.0                
## [17] tidyselect_1.2.0            prettyunits_1.1.1          
## [19] bit_4.0.5                   curl_5.0.1                 
## [21] compiler_4.2.3              cli_3.6.1                  
## [23] xml2_1.3.4                  DelayedArray_0.24.0        
## [25] rtracklayer_1.58.0          sass_0.4.5                 
## [27] scales_1.2.1                rappdirs_0.3.3             
## [29] Rsamtools_2.14.0            stringr_1.5.0              
## [31] digest_0.6.31               rmarkdown_2.21             
## [33] XVector_0.38.0              pkgconfig_2.0.3            
## [35] htmltools_0.5.5             MatrixGenerics_1.10.0      
## [37] dbplyr_2.3.2                fastmap_1.1.1              
## [39] rlang_1.1.0                 rstudioapi_0.14            
## [41] RSQLite_2.3.1               BiocIO_1.8.0               
## [43] jquerylib_0.1.4             generics_0.1.3             
## [45] jsonlite_1.8.4              BiocParallel_1.32.6        
## [47] dplyr_1.1.2                 car_3.1-2                  
## [49] RCurl_1.98-1.12             magrittr_2.0.3             
## [51] GO.db_3.16.0                GenomeInfoDbData_1.2.9     
## [53] Matrix_1.6-4                Rcpp_1.0.10                
## [55] munsell_0.5.0               fansi_1.0.4                
## [57] abind_1.4-5                 lifecycle_1.0.3            
## [59] stringi_1.7.12              yaml_2.3.7                 
## [61] carData_3.0-5               SummarizedExperiment_1.28.0
## [63] zlibbioc_1.44.0             BiocFileCache_2.6.1        
## [65] grid_4.2.3                  blob_1.2.4                 
## [67] parallel_4.2.3              crayon_1.5.2               
## [69] lattice_0.20-45             splines_4.2.3              
## [71] Biostrings_2.66.0           cowplot_1.1.1              
## [73] hms_1.1.3                   KEGGREST_1.38.0            
## [75] knitr_1.44                  pillar_1.9.0               
## [77] rjson_0.2.21                ggsignif_0.6.4             
## [79] codetools_0.2-19            fastmatch_1.1-3            
## [81] XML_3.99-0.14               glue_1.6.2                 
## [83] evaluate_0.20               png_0.1-8                  
## [85] vctrs_0.6.2                 gtable_0.3.3               
## [87] purrr_1.0.1                 cachem_1.0.7               
## [89] xfun_0.39                   broom_1.0.4                
## [91] restfulr_0.0.15             rstatix_0.7.2              
## [93] tibble_3.2.1                GenomicAlignments_1.34.1   
## [95] memoise_2.0.1